#Sourcecode for Elife paper Figure 2; A-E
#By Dr Charles Agoti
#Last updated 28 June 2022

#clear workspace, upload packages, specify working directory
rm(list=ls())
library(tidyverse);library(scales); library(lubridate); library(artyfarty); library(janitor); library(ggrepel); library(patchwork)

setwd("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.2/")

color_coast <- c("#FF0000","#000000","#00FFFF",'#FFA500','#0000FF',"#FF00FF")# 

coast_MoH_dta <- read.csv("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Data/coastal_counties_smoothed.csv", header = T, sep=",", stringsAsFactors = T)
names(coast_MoH_dta)

coast_MoH_dta<-coast_MoH_dta%>%
  pivot_longer(cols=c("Mombasa", "Kilifi", "Kwale", "TaitaTaveta", "TanaRiver", "Lamu"), names_to="County", values_to="Cases")%>%
  mutate(Date=as.Date(date, "%d-%b-%y"))%>%
  mutate(County=factor(County, levels=c("Mombasa", "Kilifi", "Kwale", "TaitaTaveta", "TanaRiver", "Lamu")))

# Plot cases observed in the different coastal Kenya counties - Figure 2A
Fig_2A <- ggplot(coast_MoH_dta, aes(x=Date, y=Cases))+
  geom_line(aes(color=County), size=0.6)+
  scale_color_manual(values = color_coast)+
  labs(y="Positive tests/million people", x="Month in 2020/21")+
  theme_scientific()+
  scale_y_continuous(limits = c(0,140), minor_breaks=seq(0, 140, 10), breaks=seq(0, 140, 20))+
  scale_x_date(breaks ="1 month", date_minor_breaks="1 month", labels = date_format("%b"))+
  geom_vline(xintercept = as.Date("2020-09-16"), size=0.25, linetype="longdash", color="black")+
  geom_text(x=as.Date("2020-06-15"), y=140, label="Wave 1", size=4)+
  geom_text(x=as.Date("2020-12-15"), y=140, label="Wave 2", size=4)+
  theme(axis.title.x = element_text(size = 11),
        axis.title.y = element_text(size = 11),
        axis.text.x = element_text(size = 11, angle =0),
        axis.text.y = element_text(size = 11),
        strip.text.x = element_text(size = 11),
        plot.title = element_text(hjust = 0.5, size = 11, face="bold"),
        legend.position = c(0.15, 0.70),
        legend.key.size = unit(0.25, "cm"),
        legend.spacing.x = unit(0.25, 'cm'),
        legend.spacing.y = unit(0.25, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        legend.background=element_rect(fill = alpha("white", 0)))+
  guides(color=guide_legend(ncol=1, title = "County"), size=T)

#pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.2/Fig.2A.pdf", width = 7.913, height =3.01, family = "Helvetica")
#print(Fig_2A)
#dev.off()
#Fig_2A

# Analyse testing data at KWTRP for Figure 2B and 2C
test_dta <-read.csv("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Data/daily_summaries_26_Feb_2020.csv", sep=",", header = TRUE, stringsAsFactors = TRUE)%>%
  mutate(datetested=as.Date(datetested, format="%Y-%m-%d"))%>%
  filter(datetested< as.Date("2021-02-28", format= "%Y-%m-%d"))%>%
  mutate(daily_coast_pos=(daily_pos_mombasa+daily_pos_kilifi+daily_pos_kwale+daily_pos_taita_taveta+daily_pos_tana_river+daily_pos_lamu))%>%
  mutate(daily_coast_tests=(daily_mombasa+daily_kilifi+daily_kwale+daily_taita_taveta+daily_tana_river+daily_lamu))%>%
  select(datetested,daily_lamu,daily_coast_tests,daily_coast_pos)%>%
  mutate(Monthly =as.Date(cut(datetested,breaks="month")))%>%
  mutate(studyphase=ifelse(datetested<as.Date("2020-09-16"), "Wave_1", "Wave_2"))

test_dta%>%
  group_by(studyphase)%>%
  summarise(x=sum(daily_coast_tests))%>%
  adorn_totals()

kwtrp_test_dta <-test_dta%>%
  group_by(Monthly)%>%
  summarise(monthly_coast_tests=sum(daily_coast_tests), monthly_coast_pos=sum(daily_coast_pos))%>%
  rename(Tests_done=monthly_coast_tests, Positive_tests=monthly_coast_pos)

# Export source data for the Figure
kwtrp_test_dta%>%
  write.csv("~/Dropbox/COVID-19/SECONDWAVE/revision/Final/Figures/Figure 2/Source_data_2_COV-19_Monthy_tests_KWTRP_26Feb2021.csv", row.names=F, na="")
names(kwtrp_test_dta)

Fig_2B <-kwtrp_test_dta%>%
  pivot_longer(cols=c("Tests_done", "Positive_tests"), names_to="tests", values_to="count")%>%
  mutate(tests=recode(tests, Tests_done="Tests done", Positive_tests="Positive tests"))%>%
  mutate(tests=factor(tests, levels=c("Tests done", "Positive tests")))%>%
  ggplot(aes(x=Monthly, y= count))+
  geom_col(aes(fill=tests), position = "dodge", color="black", width = 16)+
  scale_fill_manual(values = c("#2E9FDF","#AF0076")) +
  labs(y="Count of tests", x="Month in 2020/21")+
  theme_scientific()+
  scale_x_date(breaks ="2 month", date_minor_breaks="1 month", labels = date_format("%b"))+
  scale_y_continuous(limits = c(0, 18000), minor_breaks = seq(0,18000, 1000), breaks = seq(0, 18000, 2000))+
  geom_vline(xintercept = as.Date("2020-09-16"), size=0.25, linetype="longdash", color="black")+
  geom_text(x=as.Date("2020-06-15"), y=18000, label="Wave 1", size=4)+
  geom_text(x=as.Date("2020-12-15"), y=18000, label="Wave 2", size=4)+
  #scale_fill_manual(values=c( "white","black"))+
  theme(axis.title.x = element_text(size = 11),
        axis.title.y = element_text(size = 11),
        axis.text.x = element_text(size = 11, angle=0),
        axis.text.y = element_text(size = 11),
        plot.title = element_text(hjust = 0.5, size = 11, face="bold"),
        legend.position = c(0.80, 0.80),
        legend.key.size = unit(0.25, "cm"),
        legend.spacing.x = unit(0.25, 'cm'),
        legend.spacing.y = unit(0.25, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        legend.background=element_rect(fill = alpha("white", 0)))+
  guides(fill=guide_legend(ncol=1, title = "Key", title.position = "top"), size=T)

#pdf("Fig.2B.pdf", width = 3.956, height = 3.01, family = "Helvetica")
#print(Fig_2B)
#dev.off()
#Fig_2B

#Number of samples sequenced across the different month - Figure 2C
sequenced <- read.csv("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Data/monthly_sequencing_2.csv", header = T, sep=",", stringsAsFactors = T)%>%
  mutate(Monthly=as.Date(Monthly, format="%m/%d/%y"))
glimpse(sequenced)
Fig.2C <-read.csv("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Data/county_lineages_dta.csv")%>%
  tabyl(month, sequencing_status)%>%
  mutate(month=as.Date(month, "%Y-%m-%d"))%>%
  adorn_totals("col")%>%
  rename(sequenced=yes, not_sequenced=no)%>%
  mutate(prop_sequenced=sequenced/Total*100)%>%
  ggplot(aes(x=month, y=prop_sequenced))+
  geom_col(fill="black", color="black", width = 12)+
  geom_line(aes(x=month, y=sequenced/3), color="blue", linetype="dashed", size=0.8)+
  geom_point(aes(x=month, y=sequenced/3), shape=4, color="blue", size=2)+
  labs(y="Proportion positives sequenced", x="Month in 2020/21")+
  theme_scientific()+
  scale_x_date(breaks ="2 month", date_minor_breaks="1 month", labels = date_format("%b"))+
  geom_vline(xintercept = as.Date("2020-09-16"), size=0.25, linetype="longdash", color="black")+
  geom_text(x=as.Date("2020-06-15"), y=100, label="Wave 1", size=4)+
  geom_text(x=as.Date("2020-12-15"), y=100, label="Wave 2", size=4)+
  scale_y_continuous(limits = c(0, 100), minor_breaks = seq(0,100, 10), breaks = seq(0, 100, 20),
                     sec.axis =sec_axis(~.*3, name= "No. of genomes", breaks=seq(0, 300, 40)))+
  theme(axis.title.x = element_text(size = 11),
        axis.title.y = element_text(size = 11),
        axis.text.x = element_text(size = 11, angle=0),
        axis.text.y = element_text(size = 11),
        axis.text.y.right = element_text(size = 11, colour = "blue"),
        plot.title = element_text(hjust = 0.5, size = 11, face="bold"),
        legend.position = c(0.75, 0.90),
        legend.key.size = unit(0.25, "cm"),
        legend.spacing.x = unit(0.25, 'cm'),
        legend.spacing.y = unit(0.25, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        legend.box.background = element_blank())
#pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.2/Fig.2C.pdf",width = 3.956, height = 3.01,family = "Helvetica")
#print(Fig.2C)
#dev.off()
#Fig.2C

################################.....................Figure 2D..........................#####################################
Fig_2D <-read.csv("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Data/county_lineages_dta.csv")%>%
  filter(!is.na(Lineage))%>%
  tabyl(county_rep,studyphase)%>%
  pivot_longer(cols=c(2:3),names_to="studyphase", values_to="Freq")%>%
  ggplot(aes(x=studyphase, y=Freq))+
  geom_col(aes(fill=county_rep), position="dodge", width = 0.8)+
  scale_y_continuous(limits = c(0,350), breaks = seq(0,350,50), minor_breaks = seq(0, 350, 25))+
  scale_fill_manual(values = color_coast)+
  #coord_flip()+
  labs(y="No. of genomes", x="Study phase")+
  theme_scientific()+
  theme(axis.title.x = element_text(size = 11),
        axis.title.y = element_text(size = 11),
        axis.text.x = element_text(size = 11, angle=0),
        axis.text.y = element_text(size = 11),
        plot.title = element_text(hjust = 0.5, size = 11, face="bold"),
        #legend.position = "side",
        legend.position = c(0.58, 0.875),
        legend.key.size = unit(0.250, "cm"),
        legend.spacing.x = unit(0.250, 'cm'),
        legend.spacing.y = unit(0.250, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        strip.background = element_rect(fill="white", color = "white"),
        panel.spacing.x = unit(1.2,"lines"),
        panel.border = element_rect(fill = NULL),       # Border around plotting area.
        legend.box.background = element_blank())+
  guides(fill=guide_legend(ncol=2, title = "County", title.position = "left"), size=T)

#pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.2/Fig.2D.pdf", width = 3.956, height = 3.01,family = "Helvetica")
#print(Fig_2D)
#dev.off()
#Fig_2D


counties <- c("Mombasa", "Kilifi", "Kwale", "Taita Taveta", "Tana River", "Lamu")
case_count <-c(8450, 2458, 436, 855, 106, 350)
no_sequenced <-c(367, 119, 68, 139, 11, 43)
county_sequenced <-data.frame(counties, case_count, no_sequenced)

corr <- cor.test(x=county_sequenced$case_count, y=county_sequenced$no_sequenced, method = 'pearson')
corr

Fig_2E <-read.csv("~/Dropbox/COVID-19/SECONDWAVE/revision/Final/Figures/Figure 2/Source_data_5_genomes_county_casecount_26Feb2021.csv")%>%
  ggplot(aes(x=case_count, no_sequenced))+
  geom_point(aes(size=no_sequenced, fill=County), shape=21)+
  geom_smooth(method = lm)+
  scale_fill_manual(values=color_coast)+
  #geom_text_repel(aes(label = county),box.padding   = 0.35, point.padding = 0.5,segment.color = 'black',max.overlaps=15)+
  annotate(geom="text", x=3000, y=400, label="Spearman \n correlation = 0.96",
           color="blue",size=3, fontface='bold')+
  labs(x=" Total case count", y="No. of genomes")+
  theme_scientific()+
  theme(axis.title.x = element_text(size = 11),
        axis.title.y = element_text(size = 11),
        axis.text.x = element_text(size = 11, angle=0),
        axis.text.y = element_text(size = 11),
        plot.title = element_text(hjust = 0.5, size = 11, face="bold"),
        #legend.position = c(0.75, 0.90),
        legend.position = "right",
        legend.key.size = unit(0.25, "cm"),
        legend.spacing.x = unit(0.25, 'cm'),
        legend.spacing.y = unit(0.25, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        legend.box.background = element_blank())+
 guides(size=guide_legend(title="Genome count", ncol=1, title.position = "top"),
        fill=guide_legend(title="County", ncol=1, title.position = "top"))

#pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.2/Fig.2E.pdf", width = 3.956, height = 3.01, family = "Helvetica")
#print(Fig_2E)
#dev.off()
#Fig_2E

pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.2/Figure 2.pdf", width = 12, height = 7.02, family = "Helvetica")
Figure_2 <- (Fig_2A|Fig_2B)/(Fig.2C|Fig_2D|Fig_2E)
print(Figure_2)
dev.off()
Figure_2
